# coding: utf-8
import re
from docx import Document

# 读取 Word 文档内容
doc = Document('BOT模式介绍.docx')

# 提取文档中的文本内容
text = ""
for para in doc.paragraphs:
    text += para.text

# 定义改进的正则表达式模式
# name_pattern = r'(?<=项目经理|法人代表|联系人：)\s*[\u4e00-\u9fa5]+'

name_pattern = r'(?<=项目经理|联系人：|收件人：)\s*[\u4e00-\u9fa5]+'
phone_number_pattern = r'\d{3,4}-?\d{7,8}|\d{11}'
address_pattern = r'[\u4e00-\u9fa5]+(?:省|市|区|路|街|胡同)[\u4e00-\u9fa5\d]+号?'
organization_pattern = r'[\u4e00-\u9fa5]+(?:公司|集团|银行|大学|学院|局|中心|协会|研究所)'

# 使用正则表达式匹配文本内容
names = re.findall(name_pattern, text)
phone_numbers = re.findall(phone_number_pattern, text)
addresses = re.findall(address_pattern, text)
organizations = re.findall(organization_pattern, text)

# 输出匹配结果
print(f'人名：{names}')
print(f'电话号码：{phone_numbers}')
print(f'地址：{addresses}')
print(f'机构名：{organizations}')